gvc_agora_opentargets

Setup environment

library(tidyverse)
library(ggformula)
library(janitor)
library(skimr)
library(broom)
library(readxl)
library(jsonlite)

library(gprofiler2)

theme_set(theme_bw())

set.seed(666)

Read and prep data

GVC

Genes within 1Mb window of (each side of?) GVC loci:

gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") |>
  clean_names() |> 
  separate(gene_id, c("gene_id", "version")) |>
  select(-version, -agora_nominated_list, -opentarget_info)

gvc
gvc.genes <- gvc |> distinct(gene_id, .keep_all = TRUE) |> select(gene_id, gene_symbol) |> arrange(gene_symbol)

gvc.genes

Agora

Gene prioritization scores from Agora:

ago <- read_json("syn25741025.overall_scores.json", simplifyVector = TRUE) |> as_tibble()

ago

Open Targets

Genes from Open Targets:

ot <- read_tsv("OT-MONDO_0004975-associated-targets-5_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")

ot

Add Ensembl Gene IDs (WTF!):

otcols <- colnames(ot)
otensg <- gconvert(
  query = ot$symbol,
  organism = "hsapiens",
  target= "ENSG",
  mthreshold = Inf,
  filter_na = TRUE) |> 
  mutate(input_number = as.character(input_number)) |>
  left_join(ot |> rownames_to_column(var = "input_number"), by = "input_number") |> 
  select(ensembl_gene_id = target, otcols)

otensg

Annotate GVC genes with Agora and Open Targets scores

sum(gvc.genes$gene_id %in% ago$ensembl_gene_id)
[1] 1212
sum(gvc.genes$gene_id %in% otensg$ensembl_gene_id)
[1] 405

Arrange by Agora’s genetics_score and Open Targets’ otGeneticsPortal:

d <- gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal))

d

Perform ORA of GVC genes sorted by Agora and Open Targets genetics scores

query <- d |> distinct(symbol) |> pull(symbol)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Perform correlation analysis of GVC genes using Agora and Open Targets scores

d |> nrow()
[1] 1345
d |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 193
d |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)
d |> nrow()
[1] 1345
d |> drop_na(target_risk_score, globalScore) |> nrow()
[1] 399
d |> drop_na(target_risk_score, globalScore) |>
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
  unnest(cor)